""" Do some final processing on the contracts.

"""
import pandas as pd
import numpy as np

for sidetrack in ['_no_deepening', '']:
    #%% READ IN CONTRACTS AND STATE DATA ----------------------------------------------------
    df_contracts = pd.read_csv(
        f'./data_py/temp/06_merge_contracts_wells/contracts_merged{sidetrack}.csv', index_col=[0])

    df_contracts['fixture_date'] = pd.to_datetime(df_contracts['fixture_date'])
    df_contracts['start'] = pd.to_datetime(df_contracts['start'])
    df_contracts['end'] = pd.to_datetime(df_contracts['end'])

    df_contracts['month'] = pd.to_datetime(
        pd.to_datetime(df_contracts['fixture_date']).dt.strftime('%Y-%m')
    )
    df_contracts['month_end'] = pd.to_datetime(
        pd.to_datetime(df_contracts['end']).dt.strftime('%Y-%m')
    )
    s = np.where(df_contracts['fixture_date'].dt.day < 15, '-01', '-15')
    df_contracts['fortnight'] = pd.to_datetime(df_contracts['fixture_date'].dt.strftime('%Y-%m') + s)

    s = np.where(df_contracts['end'].dt.day < 15, '-01', '-15')
    df_contracts['fortnight_end'] = pd.to_datetime(df_contracts['end'].dt.strftime('%Y-%m') + s)

    # Read in states
    df_states_by_time = dict()
    for i in ['month', 'fortnight']:
        df_states_by_time[i] = pd.read_csv(f'./data_py/processed/states_{i}.csv', index_col=[0])
        df_states_by_time[i]['date'] = pd.to_datetime(df_states_by_time[i]['date'])
    mean_gas = df_states_by_time['month']['gas'].mean()
    df_states_by_time['month'] = df_states_by_time['month'].drop('month', axis=1)

    #%% COMBINE CONTRACTS AND STATE DATA ----------------------------------------------------
    for i in ['month', 'fortnight']:
        if i == 'month':
            suffix = ''
        elif i == 'fortnight':
            suffix = '_fortnight'

        df_contracts = df_contracts.merge(
            df_states_by_time[i],
            left_on=i,
            right_on='date',
            how='left',
            suffixes=('', suffix)
        )
        df_contracts = df_contracts.merge(
            df_states_by_time[i],
            left_on=f'{i}_end',
            right_on='date',
            how='left',
            suffixes=('', f'{suffix}_end')
        )
        df_contracts['boom'] = (df_contracts['gas'] >= mean_gas)

    #%% CLEAN UP BID DATA AND VALUE DATA ----------------------------------------------------
    # Divide by 30 here since want to convert monthly well value --> daily (all other prices
    # e.g. dayrate are in daily units - so this is just to make it comparable)
    df_contracts['value'] = (df_contracts['bid'] / mean_gas) / (30 * 1000000)
    df_contracts['day_rate'] = df_contracts['day_rate'] / 1000000

    # %% GET FINAL CONTRACTS ------------------------------------------------------------
    if sidetrack == '':
        df_contracts.to_csv('./data_py/processed/contracts_final_no_delete.csv')

    # IMPUTE VALUE IF MISSING
    df_contracts['value'] = df_contracts['value'].fillna(
        df_contracts[df_contracts['fixture_date'].dt.year <= 2009]['value'].mean()
    )

    df_contracts_final = df_contracts.dropna(subset=['mri'])

    # DROP INDEXED
    # df_contracts_final = df_contracts_final[df_contracts_final['type'] != 'Indexed']

    # Add in some states post 2010
    df_state_long = pd.read_csv(f'./data_py/processed/states_month_long.csv',
                                index_col=[0])
    cols = list(df_state_long.columns)
    cols.remove('month')

    df_contracts_final = df_contracts_final.drop(columns=cols)
    df_state_long['month'] = pd.to_datetime(df_state_long['month'])
    df_contracts_final = df_contracts_final.merge(
        df_state_long,
        left_on='month',
        right_on='month',
        how='left'
    )

    #%% GET DISTANCE BETWEEN SUBSEQUENT CONTRACTS ---------------------------------------
    def haversine(lat1, lon1, lat2, lon2, to_radians=True, earth_radius=6371):
        """
        slightly modified version: of http://stackoverflow.com/a/29546836/2901002

        Calculate the great circle distance between two points
        on the earth (specified in decimal degrees or in radians)

        All (lat, lon) coordinates must have numeric dtypes and be of equal length.

        """
        if to_radians:
            lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

        a = np.sin((lat2 - lat1) / 2.0) ** 2 + \
            np.cos(lat1) * np.cos(lat2) * np.sin((lon2 - lon1) / 2.0) ** 2

        # Return in 100s of miles
        return earth_radius * 2 * np.arcsin(np.sqrt(a)) * 0.621371 / 100


    # Get dist.; include 1[new contract with previous location available] * distance
    df_contracts_final.loc[1:, 'dist'] = haversine(
        np.array(df_contracts_final.surf_latitude.shift().dropna()),
        np.array(df_contracts_final.surf_longitude.shift().dropna()),
        np.array(df_contracts_final.loc[1:, 'surf_latitude']),
        np.array(df_contracts_final.loc[1:, 'surf_longitude'])
    )
    df_contracts_final.loc[
        df_contracts_final.groupby('rig_name').rig_name.cumcount() == 0,
        'dist'] = np.nan

    df_contracts_final['include_dist'] = 1 * (
        (~df_contracts_final['dist'].isna())
        & (df_contracts_final['reneg'] == 0)
    )
    df_contracts_final.loc[df_contracts_final['include_dist'] == 0, 'dist'] = 0.0

    #%% SAVE ----------------------------------------------------------------------------
    df_contracts_final.to_csv(f'./data_py/processed/contracts_final_long{sidetrack}.csv')
    a = df_contracts_final[df_contracts_final['start'].dt.year <= 2009]
    a.to_csv(f'./data_py/processed/contracts_final{sidetrack}.csv')
